Context Navigation

source: lib/misc.c @ c11b68a

Last change on this file since c11b68a was fca4683, checked in by dequis <dx@…>, at 2016-11-12T03:38:34Z
word_wrap: truncate utf8 safely
Property mode set to `100644`
File size: 17.6 KB

Line
1	/********************************************************************\
2	* BitlBee -- An IRC to other IM-networks gateway *
3	* *
4	* Copyright 2002-2012 Wilmer van der Gaast and others *
5	\********************************************************************/
6
7	/*
8	* Various utility functions. Some are copied from Gaim to support the
9	* IM-modules, most are from BitlBee.
10	*
11	* Copyright (C) 1998-1999, Mark Spencer <markster@marko.net>
12	* (and possibly other members of the Gaim team)
13	* Copyright 2002-2012 Wilmer van der Gaast <wilmer@gaast.net>
14	*/
15
16	/*
17	This program is free software; you can redistribute it and/or modify
18	it under the terms of the GNU General Public License as published by
19	the Free Software Foundation; either version 2 of the License, or
20	(at your option) any later version.
21
22	This program is distributed in the hope that it will be useful,
23	but WITHOUT ANY WARRANTY; without even the implied warranty of
24	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25	GNU General Public License for more details.
26
27	You should have received a copy of the GNU General Public License with
28	the Debian GNU/Linux distribution in /usr/share/common-licenses/GPL;
29	if not, write to the Free Software Foundation, Inc., 51 Franklin St.,
30	Fifth Floor, Boston, MA 02110-1301 USA
31	*/
32
33	#define BITLBEE_CORE
34	#include "nogaim.h"
35	#include "base64.h"
36	#include "md5.h"
37	#include <stdio.h>
38	#include <stdlib.h>
39	#include <string.h>
40	#include <ctype.h>
41	#include <glib.h>
42	#include <time.h>
43
44	#ifdef HAVE_RESOLV_A
45	#include <arpa/nameser.h>
46	#include <resolv.h>
47	#endif
48
49	#include "md5.h"
50	#include "ssl_client.h"
51
52	void strip_linefeed(gchar *text)
53	{
54	int i, j;
55	gchar *text2 = g_malloc(strlen(text) + 1);
56
57	for (i = 0, j = 0; text[i]; i++) {
58	if (text[i] != '\r') {
59	text2[j++] = text[i];
60	}
61	}
62	text2[j] = '\0';
63
64	strcpy(text, text2);
65	g_free(text2);
66	}
67
68	time_t get_time(int year, int month, int day, int hour, int min, int sec)
69	{
70	struct tm tm;
71
72	memset(&tm, 0, sizeof(struct tm));
73	tm.tm_year = year - 1900;
74	tm.tm_mon = month - 1;
75	tm.tm_mday = day;
76	tm.tm_hour = hour;
77	tm.tm_min = min;
78	tm.tm_sec = sec >= 0 ? sec : time(NULL) % 60;
79
80	return mktime(&tm);
81	}
82
83	time_t mktime_utc(struct tm *tp)
84	{
85	struct tm utc;
86	time_t res, tres;
87
88	tp->tm_isdst = -1;
89	res = mktime(tp);
90	/* Problem is, mktime() just gave us the GMT timestamp for the
91	given local time... While the given time WAS NOT local. So
92	we should fix this now.
93
94	Now I could choose between messing with environment variables
95	(kludgy) or using timegm() (not portable)... Or doing the
96	following, which I actually prefer...
97
98	tzset() may also work but in other places I actually want to
99	use local time.
100
101	FFFFFFFFFFFFFFFFFFFFFUUUUUUUUUUUUUUUUUUUU!! */
102	gmtime_r(&res, &utc);
103	utc.tm_isdst = -1;
104	if (utc.tm_hour == tp->tm_hour && utc.tm_min == tp->tm_min) {
105	/* Sweet! We're in UTC right now... */
106	return res;
107	}
108
109	tres = mktime(&utc);
110	res += res - tres;
111
112	/* Yes, this is a hack. And it will go wrong around DST changes.
113	BUT this is more likely to be threadsafe than messing with
114	environment variables, and possibly more portable... */
115
116	return res;
117	}
118
119	typedef struct htmlentity {
120	char code[7];
121	char is[3];
122	} htmlentity_t;
123
124	static const htmlentity_t ent[] =
125	{
126	{ "lt", "<" },
127	{ "gt", ">" },
128	{ "amp", "&" },
129	{ "apos", "'" },
130	{ "quot", "\"" },
131	{ "aacute", "á" },
132	{ "eacute", "é" },
133	{ "iacute", "é" },
134	{ "oacute", "ó" },
135	{ "uacute", "ú" },
136	{ "agrave", "à" },
137	{ "egrave", "è" },
138	{ "igrave", "ì" },
139	{ "ograve", "ò" },
140	{ "ugrave", "ù" },
141	{ "acirc", "â" },
142	{ "ecirc", "ê" },
143	{ "icirc", "î" },
144	{ "ocirc", "ô" },
145	{ "ucirc", "û" },
146	{ "auml", "ä" },
147	{ "euml", "ë" },
148	{ "iuml", "ï" },
149	{ "ouml", "ö" },
150	{ "uuml", "ü" },
151	{ "nbsp", " " },
152	{ "", "" }
153	};
154
155	void strip_html(char *in)
156	{
157	char *start = in;
158	char out[strlen(in) + 1];
159	char s = out, cs;
160	int i, matched;
161	int taglen;
162
163	memset(out, 0, sizeof(out));
164
165	while (*in) {
166	if (in == '<' && (g_ascii_isalpha((in + 1)) \|\| *(in + 1) == '/')) {
167	/* If in points at a < and in+1 points at a letter or a slash, this is probably
168	a HTML-tag. Try to find a closing > and continue there. If the > can't be
169	found, assume that it wasn't a HTML-tag after all. */
170
171	cs = in;
172
173	while (in && in != '>') {
174	in++;
175	}
176
177	taglen = in - cs - 1; /* not <0 because the above loop runs at least once */
178	if (*in) {
179	if (g_strncasecmp(cs + 1, "b", taglen) == 0) {
180	*(s++) = '\x02';
181	} else if (g_strncasecmp(cs + 1, "/b", taglen) == 0) {
182	*(s++) = '\x02';
183	} else if (g_strncasecmp(cs + 1, "i", taglen) == 0) {
184	*(s++) = '\x1f';
185	} else if (g_strncasecmp(cs + 1, "/i", taglen) == 0) {
186	*(s++) = '\x1f';
187	} else if (g_strncasecmp(cs + 1, "br", taglen) == 0) {
188	*(s++) = '\n';
189	} else if (g_strncasecmp(cs + 1, "br/", taglen) == 0) {
190	*(s++) = '\n';
191	} else if (g_strncasecmp(cs + 1, "br /", taglen) == 0) {
192	*(s++) = '\n';
193	}
194	in++;
195	} else {
196	in = cs;
197	(s++) = (in++);
198	}
199	} else if (*in == '&') {
200	cs = ++in;
201	while (in && g_ascii_isalpha(in)) {
202	in++;
203	}
204
205	if (*in == ';') {
206	in++;
207	}
208	matched = 0;
209
210	for (i = 0; *ent[i].code; i++) {
211	if (g_strncasecmp(ent[i].code, cs, strlen(ent[i].code)) == 0) {
212	int j;
213
214	for (j = 0; ent[i].is[j]; j++) {
215	*(s++) = ent[i].is[j];
216	}
217
218	matched = 1;
219	break;
220	}
221	}
222
223	/* None of the entities were matched, so return the string */
224	if (!matched) {
225	in = cs - 1;
226	(s++) = (in++);
227	}
228	} else {
229	(s++) = (in++);
230	}
231	}
232
233	strcpy(start, out);
234	}
235
236	char escape_html(const char html)
237	{
238	const char *c = html;
239	GString *ret;
240	char *str;
241
242	if (html == NULL) {
243	return(NULL);
244	}
245
246	ret = g_string_new("");
247
248	while (*c) {
249	switch (*c) {
250	case '&':
251	ret = g_string_append(ret, "&");
252	break;
253	case '<':
254	ret = g_string_append(ret, "<");
255	break;
256	case '>':
257	ret = g_string_append(ret, ">");
258	break;
259	case '"':
260	ret = g_string_append(ret, """);
261	break;
262	default:
263	ret = g_string_append_c(ret, *c);
264	}
265	c++;
266	}
267
268	str = ret->str;
269	g_string_free(ret, FALSE);
270	return(str);
271	}
272
273	/* Decode%20a%20file%20name */
274	void http_decode(char *s)
275	{
276	char *t;
277	int i, j, k;
278
279	t = g_new(char, strlen(s) + 1);
280
281	for (i = j = 0; s[i]; i++, j++) {
282	if (s[i] == '%') {
283	if (sscanf(s + i + 1, "%2x", &k)) {
284	t[j] = k;
285	i += 2;
286	} else {
287	*t = 0;
288	break;
289	}
290	} else {
291	t[j] = s[i];
292	}
293	}
294	t[j] = 0;
295
296	strcpy(s, t);
297	g_free(t);
298	}
299
300	/* Warning: This one explodes the string. Worst-cases can make the string 3x its original size! */
301	/* This function is safe, but make sure you call it safely as well! */
302	void http_encode(char *s)
303	{
304	char t[strlen(s) + 1];
305	int i, j;
306
307	strcpy(t, s);
308	for (i = j = 0; t[i]; i++, j++) {
309	/* Warning: g_ascii_isalnum() is locale-aware, so don't use it here! */
310	if ((t[i] >= 'A' && t[i] <= 'Z') \|\|
311	(t[i] >= 'a' && t[i] <= 'z') \|\|
312	(t[i] >= '0' && t[i] <= '9') \|\|
313	strchr("._-~", t[i])) {
314	s[j] = t[i];
315	} else {
316	sprintf(s + j, "%%%02X", ((unsigned char *) t)[i]);
317	j += 2;
318	}
319	}
320	s[j] = 0;
321	}
322
323	/* Strip newlines from a string. Modifies the string passed to it. */
324	char strip_newlines(char source)
325	{
326	int i;
327
328	for (i = 0; source[i] != '\0'; i++) {
329	if (source[i] == '\n' \|\| source[i] == '\r') {
330	source[i] = ' ';
331	}
332	}
333
334	return source;
335	}
336
337	/* Wrap an IPv4 address into IPv6 space. Not thread-safe... */
338	char ipv6_wrap(char src)
339	{
340	static char dst[64];
341	int i;
342
343	for (i = 0; src[i]; i++) {
344	if ((src[i] < '0' \|\| src[i] > '9') && src[i] != '.') {
345	break;
346	}
347	}
348
349	/* Hmm, it's not even an IP... */
350	if (src[i]) {
351	return src;
352	}
353
354	g_snprintf(dst, sizeof(dst), "::ffff:%s", src);
355
356	return dst;
357	}
358
359	/* Unwrap an IPv4 address into IPv6 space. Thread-safe, because it's very simple. :-) */
360	char ipv6_unwrap(char src)
361	{
362	int i;
363
364	if (g_strncasecmp(src, "::ffff:", 7) != 0) {
365	return src;
366	}
367
368	for (i = 7; src[i]; i++) {
369	if ((src[i] < '0' \|\| src[i] > '9') && src[i] != '.') {
370	break;
371	}
372	}
373
374	/* Hmm, it's not even an IP... */
375	if (src[i]) {
376	return src;
377	}
378
379	return (src + 7);
380	}
381
382	/* Convert from one charset to another.
383
384	from_cs, to_cs: Source and destination charsets
385	src, dst: Source and destination strings
386	size: Size if src. 0 == use strlen(). strlen() is not reliable for UNICODE/UTF16 strings though.
387	maxbuf: Maximum number of bytes to write to dst
388
389	Returns the number of bytes written to maxbuf or -1 on an error.
390	*/
391	signed int do_iconv(char from_cs, char to_cs, char src, char dst, size_t size, size_t maxbuf)
392	{
393	GIConv cd;
394	size_t res;
395	size_t inbytesleft, outbytesleft;
396	char *inbuf = src;
397	char *outbuf = dst;
398
399	cd = g_iconv_open(to_cs, from_cs);
400	if (cd == (GIConv) - 1) {
401	return -1;
402	}
403
404	inbytesleft = size ? size : strlen(src);
405	outbytesleft = maxbuf - 1;
406	res = g_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
407	*outbuf = '\0';
408	g_iconv_close(cd);
409
410	if (res != 0) {
411	return -1;
412	} else {
413	return outbuf - dst;
414	}
415	}
416
417	/* A wrapper for /dev/urandom.
418	* If /dev/urandom is not present or not usable, it calls abort()
419	* to prevent bitlbee from working without a decent entropy source */
420	void random_bytes(unsigned char *buf, int count)
421	{
422	int fd;
423
424	if (((fd = open("/dev/urandom", O_RDONLY)) == -1) \|\|
425	(read(fd, buf, count) == -1)) {
426	log_message(LOGLVL_ERROR, "/dev/urandom not present - aborting");
427	abort();
428	}
429
430	close(fd);
431	}
432
433	int is_bool(char *value)
434	{
435	if (*value == 0) {
436	return 0;
437	}
438
439	if ((g_strcasecmp(value,
440	"true") == 0) \|\| (g_strcasecmp(value, "yes") == 0) \|\| (g_strcasecmp(value, "on") == 0)) {
441	return 1;
442	}
443	if ((g_strcasecmp(value,
444	"false") == 0) \|\| (g_strcasecmp(value, "no") == 0) \|\| (g_strcasecmp(value, "off") == 0)) {
445	return 1;
446	}
447
448	while (*value) {
449	if (!g_ascii_isdigit(*value)) {
450	return 0;
451	} else {
452	value++;
453	}
454	}
455
456	return 1;
457	}
458
459	int bool2int(char *value)
460	{
461	int i;
462
463	if ((g_strcasecmp(value,
464	"true") == 0) \|\| (g_strcasecmp(value, "yes") == 0) \|\| (g_strcasecmp(value, "on") == 0)) {
465	return 1;
466	}
467	if ((g_strcasecmp(value,
468	"false") == 0) \|\| (g_strcasecmp(value, "no") == 0) \|\| (g_strcasecmp(value, "off") == 0)) {
469	return 0;
470	}
471
472	if (sscanf(value, "%d", &i) == 1) {
473	return i;
474	}
475
476	return 0;
477	}
478
479	struct ns_srv_reply *srv_lookup(char service, char protocol, char domain)
480	{
481	struct ns_srv_reply **replies = NULL;
482
483	#ifdef HAVE_RESOLV_A
484	struct ns_srv_reply *reply = NULL;
485	char name[1024];
486	unsigned char querybuf[1024];
487	const unsigned char *buf;
488	ns_msg nsh;
489	ns_rr rr;
490	int n, len, size;
491
492	g_snprintf(name, sizeof(name), "_%s._%s.%s", service, protocol, domain);
493
494	if ((size = res_query(name, ns_c_in, ns_t_srv, querybuf, sizeof(querybuf))) <= 0) {
495	return NULL;
496	}
497
498	if (ns_initparse(querybuf, size, &nsh) != 0) {
499	return NULL;
500	}
501
502	n = 0;
503	while (ns_parserr(&nsh, ns_s_an, n, &rr) == 0) {
504	char name[NS_MAXDNAME];
505
506	if (ns_rr_rdlen(rr) < 7) {
507	break;
508	}
509
510	buf = ns_rr_rdata(rr);
511
512	if (dn_expand(querybuf, querybuf + size, &buf[6], name, NS_MAXDNAME) == -1) {
513	break;
514	}
515
516	len = strlen(name) + 1;
517
518	reply = g_malloc(sizeof(struct ns_srv_reply) + len);
519	memcpy(reply->name, name, len);
520
521	reply->prio = (buf[0] << 8) \| buf[1];
522	reply->weight = (buf[2] << 8) \| buf[3];
523	reply->port = (buf[4] << 8) \| buf[5];
524
525	n++;
526	replies = g_renew(struct ns_srv_reply *, replies, n + 1);
527	replies[n - 1] = reply;
528	}
529	if (replies) {
530	replies[n] = NULL;
531	}
532	#endif
533
534	return replies;
535	}
536
537	void srv_free(struct ns_srv_reply **srv)
538	{
539	int i;
540
541	if (srv == NULL) {
542	return;
543	}
544
545	for (i = 0; srv[i]; i++) {
546	g_free(srv[i]);
547	}
548	g_free(srv);
549	}
550
551	char word_wrap(const char msg, int line_len)
552	{
553	GString *ret = g_string_sized_new(strlen(msg) + 16);
554
555	while (strlen(msg) > line_len) {
556	int i;
557
558	/* First try to find out if there's a newline already. Don't
559	want to add more splits than necessary. */
560	for (i = line_len; i > 0 && msg[i] != '\n'; i--) {
561	;
562	}
563	if (msg[i] == '\n') {
564	g_string_append_len(ret, msg, i + 1);
565	msg += i + 1;
566	continue;
567	}
568
569	for (i = line_len; i > 0; i--) {
570	if (msg[i] == '-') {
571	g_string_append_len(ret, msg, i + 1);
572	g_string_append_c(ret, '\n');
573	msg += i + 1;
574	break;
575	} else if (msg[i] == ' ') {
576	g_string_append_len(ret, msg, i);
577	g_string_append_c(ret, '\n');
578	msg += i + 1;
579	break;
580	}
581	}
582	if (i == 0) {
583	const char *end;
584	size_t len;
585
586	g_utf8_validate(msg, line_len, &end);
587
588	len = (end != msg) ? end - msg : line_len;
589
590	g_string_append_len(ret, msg, len);
591	g_string_append_c(ret, '\n');
592	msg += len;
593	}
594	}
595	g_string_append(ret, msg);
596
597	return g_string_free(ret, FALSE);
598	}
599
600	gboolean ssl_sockerr_again(void *ssl)
601	{
602	if (ssl) {
603	return ssl_errno == SSL_AGAIN;
604	} else {
605	return sockerr_again();
606	}
607	}
608
609	/* Returns values: -1 == Failure (base64-decoded to something unexpected)
610	0 == Okay
611	1 == Password doesn't match the hash. */
612	int md5_verify_password(char password, char hash)
613	{
614	md5_byte_t *pass_dec = NULL;
615	md5_byte_t pass_md5[16];
616	md5_state_t md5_state;
617	int ret = -1, i;
618
619	if (base64_decode(hash, &pass_dec) == 21) {
620	md5_init(&md5_state);
621	md5_append(&md5_state, (md5_byte_t *) password, strlen(password));
622	md5_append(&md5_state, (md5_byte_t ) pass_dec + 16, 5); / Hmmm, salt! */
623	md5_finish(&md5_state, pass_md5);
624
625	for (i = 0; i < 16; i++) {
626	if (pass_dec[i] != pass_md5[i]) {
627	ret = 1;
628	break;
629	}
630	}
631
632	/* If we reached the end of the loop, it was a match! */
633	if (i == 16) {
634	ret = 0;
635	}
636	}
637
638	g_free(pass_dec);
639
640	return ret;
641	}
642
643	/* Split commands (root-style, not IRC-style). Handles "quoting of"
644	white\ space in 'various ways'. Returns a NULL-terminated static
645	char** so watch out with nested use! Definitely not thread-safe. */
646	char *split_command_parts(char command, int limit)
647	{
648	static char *cmd[IRC_MAX_ARGS + 1];
649	char *s, q = 0;
650	int k;
651
652	memset(cmd, 0, sizeof(cmd));
653	cmd[0] = command;
654	k = 1;
655	for (s = command; *s && k < IRC_MAX_ARGS; s++) {
656	if (*s == ' ' && !q) {
657	*s = 0;
658	while (*++s == ' ') {
659	;
660	}
661	if (k != limit && (s == '"' \|\| s == '\'')) {
662	q = *s;
663	s++;
664	}
665	if (*s) {
666	cmd[k++] = s;
667	if (limit && k > limit) {
668	break;
669	}
670	s--;
671	} else {
672	break;
673	}
674	} else if (*s == '\\' && ((!q && s[1]) \|\| (q && q == s[1]))) {
675	char *cpy;
676
677	for (cpy = s; *cpy; cpy++) {
678	cpy[0] = cpy[1];
679	}
680	} else if (*s == q) {
681	q = *s = 0;
682	}
683	}
684
685	/* Full zero-padding for easier argc checking. */
686	while (k <= IRC_MAX_ARGS) {
687	cmd[k++] = NULL;
688	}
689
690	return cmd;
691	}
692
693	char get_rfc822_header(const char text, const char *header, int len)
694	{
695	int hlen = strlen(header), i;
696	const char *ret;
697
698	if (text == NULL) {
699	return NULL;
700	}
701
702	if (len == 0) {
703	len = strlen(text);
704	}
705
706	i = 0;
707	while ((i + hlen) < len) {
708	/* Maybe this is a bit over-commented, but I just hate this part... */
709	if (g_strncasecmp(text + i, header, hlen) == 0) {
710	/* Skip to the (probable) end of the header */
711	i += hlen;
712
713	/* Find the first non-[: \t] character */
714	while (i < len && (text[i] == ':' \|\| text[i] == ' ' \|\| text[i] == '\t')) {
715	i++;
716	}
717
718	/* Make sure we're still inside the string */
719	if (i >= len) {
720	return(NULL);
721	}
722
723	/* Save the position */
724	ret = text + i;
725
726	/* Search for the end of this line */
727	while (i < len && text[i] != '\r' && text[i] != '\n') {
728	i++;
729	}
730
731	/* Copy the found data */
732	return(g_strndup(ret, text + i - ret));
733	}
734
735	/* This wasn't the header we were looking for, skip to the next line. */
736	while (i < len && (text[i] != '\r' && text[i] != '\n')) {
737	i++;
738	}
739	while (i < len && (text[i] == '\r' \|\| text[i] == '\n')) {
740	i++;
741	}
742
743	/* End of headers? */
744	if ((i >= 4 && strncmp(text + i - 4, "\r\n\r\n", 4) == 0) \|\|
745	(i >= 2 && (strncmp(text + i - 2, "\n\n", 2) == 0 \|\|
746	strncmp(text + i - 2, "\r\r", 2) == 0))) {
747	break;
748	}
749	}
750
751	return NULL;
752	}
753
754	/* Takes a string, truncates it where it's safe, returns the new length */
755	int truncate_utf8(char *string, int maxlen)
756	{
757	char *end;
758
759	g_utf8_validate((const gchar ) string, maxlen, (const gchar *) &end);
760	*end = '\0';
761	return end - string;
762	}
763
764	/* Parses a guint64 from string, returns TRUE on success */
765	gboolean parse_int64(char string, int base, guint64 number)
766	{
767	guint64 parsed;
768	char *endptr;
769
770	errno = 0;
771	parsed = g_ascii_strtoull(string, &endptr, base);
772	if (errno \|\| endptr == string \|\| *endptr != '\0') {
773	return FALSE;
774	}
775	*number = parsed;
776	return TRUE;
777	}
778
779	/* Filters all the characters in 'blacklist' replacing them with 'replacement'.
780	* Modifies the string in-place and returns the string itself.
781	* For the opposite, use g_strcanon() */
782	char str_reject_chars(char string, const char *reject, char replacement)
783	{
784	char *c = string;
785
786	while (*c) {
787	c += strcspn(c, reject);
788	if (*c) {
789	*c = replacement;
790	}
791	}
792
793	return string;
794	}
795
796	/* Returns a string that is exactly 'char_len' utf8 characters long (not bytes),
797	* padded to the right with spaces or truncated with the 'ellipsis' parameter
798	* if specified (can be NULL).
799	* Returns a newly allocated string, or NULL on invalid parameters. */
800	char str_pad_and_truncate(const char string, long char_len, const char *ellipsis)
801	{
802	size_t string_len = strlen(string);
803	size_t ellipsis_len = (ellipsis) ? strlen(ellipsis) : 0;
804	long orig_len = g_utf8_strlen(string, -1);
805
806	g_return_val_if_fail(char_len > ellipsis_len, NULL);
807
808	if (orig_len > char_len) {
809	char *ret = g_malloc(string_len + 1);
810	g_utf8_strncpy(ret, string, char_len - ellipsis_len);
811	if (ellipsis) {
812	g_strlcat(ret, ellipsis, string_len);
813	}
814	return ret;
815	} else if (orig_len < char_len) {
816	return g_strdup_printf("%s%*s", string, (int) (char_len - orig_len), "");
817	} else {
818	return g_strdup(string);
819	}
820	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: