Context Navigation

source: lib/misc.c @ f9110b4

Last change on this file since f9110b4 was 0b3ffb1, checked in by Wilmer van der Gaast <wilmer@…>, at 2010-05-19T19:46:43Z

Setting the HTML flag on a connection has a nasty side effect of escaping
a lot of "special" characters, and these HTML entities are not counted as
one character. :-(

So just strip HTML of incoming stuff and don't do anything with what goes
out. It's not required.

The story may actually be more complicated this, let's find out.

Property mode set to 100644

File size: 14.0 KB

Line
1	/********************************************************************\
2	* BitlBee -- An IRC to other IM-networks gateway *
3	* *
4	* Copyright 2002-2006 Wilmer van der Gaast and others *
5	\********************************************************************/
6
7	/*
8	* Various utility functions. Some are copied from Gaim to support the
9	* IM-modules, most are from BitlBee.
10	*
11	* Copyright (C) 1998-1999, Mark Spencer <markster@marko.net>
12	* (and possibly other members of the Gaim team)
13	* Copyright 2002-2006 Wilmer van der Gaast <wilmer@gaast.net>
14	*/
15
16	/*
17	This program is free software; you can redistribute it and/or modify
18	it under the terms of the GNU General Public License as published by
19	the Free Software Foundation; either version 2 of the License, or
20	(at your option) any later version.
21
22	This program is distributed in the hope that it will be useful,
23	but WITHOUT ANY WARRANTY; without even the implied warranty of
24	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25	GNU General Public License for more details.
26
27	You should have received a copy of the GNU General Public License with
28	the Debian GNU/Linux distribution in /usr/share/common-licenses/GPL;
29	if not, write to the Free Software Foundation, Inc., 59 Temple Place,
30	Suite 330, Boston, MA 02111-1307 USA
31	*/
32
33	#define BITLBEE_CORE
34	#include "nogaim.h"
35	#include "base64.h"
36	#include "md5.h"
37	#include <stdio.h>
38	#include <stdlib.h>
39	#include <string.h>
40	#include <ctype.h>
41	#include <glib.h>
42	#include <time.h>
43
44	#ifdef HAVE_RESOLV_A
45	#include <arpa/nameser.h>
46	#include <resolv.h>
47	#endif
48
49	#include "md5.h"
50	#include "ssl_client.h"
51
52	void strip_linefeed(gchar *text)
53	{
54	int i, j;
55	gchar *text2 = g_malloc(strlen(text) + 1);
56
57	for (i = 0, j = 0; text[i]; i++)
58	if (text[i] != '\r')
59	text2[j++] = text[i];
60	text2[j] = '\0';
61
62	strcpy(text, text2);
63	g_free(text2);
64	}
65
66	time_t get_time(int year, int month, int day, int hour, int min, int sec)
67	{
68	struct tm tm;
69
70	memset(&tm, 0, sizeof(struct tm));
71	tm.tm_year = year - 1900;
72	tm.tm_mon = month - 1;
73	tm.tm_mday = day;
74	tm.tm_hour = hour;
75	tm.tm_min = min;
76	tm.tm_sec = sec >= 0 ? sec : time(NULL) % 60;
77
78	return mktime(&tm);
79	}
80
81	time_t mktime_utc( struct tm *tp )
82	{
83	struct tm utc;
84	time_t res, tres;
85
86	tp->tm_isdst = -1;
87	res = mktime( tp );
88	/* Problem is, mktime() just gave us the GMT timestamp for the
89	given local time... While the given time WAS NOT local. So
90	we should fix this now.
91
92	Now I could choose between messing with environment variables
93	(kludgy) or using timegm() (not portable)... Or doing the
94	following, which I actually prefer...
95
96	tzset() may also work but in other places I actually want to
97	use local time.
98
99	FFFFFFFFFFFFFFFFFFFFFUUUUUUUUUUUUUUUUUUUU!! */
100	gmtime_r( &res, &utc );
101	utc.tm_isdst = -1;
102	if( utc.tm_hour == tp->tm_hour && utc.tm_min == tp->tm_min )
103	/* Sweet! We're in UTC right now... */
104	return res;
105
106	tres = mktime( &utc );
107	res += res - tres;
108
109	/* Yes, this is a hack. And it will go wrong around DST changes.
110	BUT this is more likely to be threadsafe than messing with
111	environment variables, and possibly more portable... */
112
113	return res;
114	}
115
116	typedef struct htmlentity
117	{
118	char code[7];
119	char is[3];
120	} htmlentity_t;
121
122	static const htmlentity_t ent[] =
123	{
124	{ "lt", "<" },
125	{ "gt", ">" },
126	{ "amp", "&" },
127	{ "apos", "'" },
128	{ "quot", "\"" },
129	{ "aacute", "á" },
130	{ "eacute", "é" },
131	{ "iacute", "é" },
132	{ "oacute", "ó" },
133	{ "uacute", "ú" },
134	{ "agrave", "à" },
135	{ "egrave", "è" },
136	{ "igrave", "ì" },
137	{ "ograve", "ò" },
138	{ "ugrave", "ù" },
139	{ "acirc", "â" },
140	{ "ecirc", "ê" },
141	{ "icirc", "î" },
142	{ "ocirc", "ô" },
143	{ "ucirc", "û" },
144	{ "auml", "ä" },
145	{ "euml", "ë" },
146	{ "iuml", "ï" },
147	{ "ouml", "ö" },
148	{ "uuml", "ü" },
149	{ "nbsp", " " },
150	{ "", "" }
151	};
152
153	void strip_html( char *in )
154	{
155	char *start = in;
156	char out[strlen(in)+1];
157	char s = out, cs;
158	int i, matched;
159
160	memset( out, 0, sizeof( out ) );
161
162	while( *in )
163	{
164	if( in == '<' && ( isalpha( (in+1) ) \|\| *(in+1) == '/' ) )
165	{
166	/* If in points at a < and in+1 points at a letter or a slash, this is probably
167	a HTML-tag. Try to find a closing > and continue there. If the > can't be
168	found, assume that it wasn't a HTML-tag after all. */
169
170	cs = in;
171
172	while( in && in != '>' )
173	in ++;
174
175	if( *in )
176	{
177	if( g_strncasecmp( cs+1, "br", 2) == 0 )
178	*(s++) = '\n';
179	in ++;
180	}
181	else
182	{
183	in = cs;
184	(s++) = (in++);
185	}
186	}
187	else if( *in == '&' )
188	{
189	cs = ++in;
190	while( in && isalpha( in ) )
191	in ++;
192
193	if( *in == ';' ) in ++;
194	matched = 0;
195
196	for( i = 0; *ent[i].code; i ++ )
197	if( g_strncasecmp( ent[i].code, cs, strlen( ent[i].code ) ) == 0 )
198	{
199	int j;
200
201	for( j = 0; ent[i].is[j]; j ++ )
202	*(s++) = ent[i].is[j];
203
204	matched = 1;
205	break;
206	}
207
208	/* None of the entities were matched, so return the string */
209	if( !matched )
210	{
211	in = cs - 1;
212	(s++) = (in++);
213	}
214	}
215	else
216	{
217	(s++) = (in++);
218	}
219	}
220
221	strcpy( start, out );
222	}
223
224	char escape_html( const char html )
225	{
226	const char *c = html;
227	GString *ret;
228	char *str;
229
230	if( html == NULL )
231	return( NULL );
232
233	ret = g_string_new( "" );
234
235	while( *c )
236	{
237	switch( *c )
238	{
239	case '&':
240	ret = g_string_append( ret, "&" );
241	break;
242	case '<':
243	ret = g_string_append( ret, "<" );
244	break;
245	case '>':
246	ret = g_string_append( ret, ">" );
247	break;
248	case '"':
249	ret = g_string_append( ret, """ );
250	break;
251	default:
252	ret = g_string_append_c( ret, *c );
253	}
254	c ++;
255	}
256
257	str = ret->str;
258	g_string_free( ret, FALSE );
259	return( str );
260	}
261
262	/* Decode%20a%20file%20name */
263	void http_decode( char *s )
264	{
265	char *t;
266	int i, j, k;
267
268	t = g_new( char, strlen( s ) + 1 );
269
270	for( i = j = 0; s[i]; i ++, j ++ )
271	{
272	if( s[i] == '%' )
273	{
274	if( sscanf( s + i + 1, "%2x", &k ) )
275	{
276	t[j] = k;
277	i += 2;
278	}
279	else
280	{
281	*t = 0;
282	break;
283	}
284	}
285	else
286	{
287	t[j] = s[i];
288	}
289	}
290	t[j] = 0;
291
292	strcpy( s, t );
293	g_free( t );
294	}
295
296	/* Warning: This one explodes the string. Worst-cases can make the string 3x its original size! */
297	/* This fuction is safe, but make sure you call it safely as well! */
298	void http_encode( char *s )
299	{
300	char *t;
301	int i, j;
302
303	t = g_strdup( s );
304
305	for( i = j = 0; t[i]; i ++, j ++ )
306	{
307	if( !isalnum( t[i] ) && !strchr( "._-~", t[i] ) )
308	{
309	sprintf( s + j, "%%%02X", ((unsigned char*)t)[i] );
310	j += 2;
311	}
312	else
313	{
314	s[j] = t[i];
315	}
316	}
317	s[j] = 0;
318
319	g_free( t );
320	}
321
322	/* Strip newlines from a string. Modifies the string passed to it. */
323	char strip_newlines( char source )
324	{
325	int i;
326
327	for( i = 0; source[i] != '\0'; i ++ )
328	if( source[i] == '\n' \|\| source[i] == '\r' )
329	source[i] = ' ';
330
331	return source;
332	}
333
334	/* Wrap an IPv4 address into IPv6 space. Not thread-safe... */
335	char ipv6_wrap( char src )
336	{
337	static char dst[64];
338	int i;
339
340	for( i = 0; src[i]; i ++ )
341	if( ( src[i] < '0' \|\| src[i] > '9' ) && src[i] != '.' )
342	break;
343
344	/* Hmm, it's not even an IP... */
345	if( src[i] )
346	return src;
347
348	g_snprintf( dst, sizeof( dst ), "::ffff:%s", src );
349
350	return dst;
351	}
352
353	/* Unwrap an IPv4 address into IPv6 space. Thread-safe, because it's very simple. :-) */
354	char ipv6_unwrap( char src )
355	{
356	int i;
357
358	if( g_strncasecmp( src, "::ffff:", 7 ) != 0 )
359	return src;
360
361	for( i = 7; src[i]; i ++ )
362	if( ( src[i] < '0' \|\| src[i] > '9' ) && src[i] != '.' )
363	break;
364
365	/* Hmm, it's not even an IP... */
366	if( src[i] )
367	return src;
368
369	return ( src + 7 );
370	}
371
372	/* Convert from one charset to another.
373
374	from_cs, to_cs: Source and destination charsets
375	src, dst: Source and destination strings
376	size: Size if src. 0 == use strlen(). strlen() is not reliable for UNICODE/UTF16 strings though.
377	maxbuf: Maximum number of bytes to write to dst
378
379	Returns the number of bytes written to maxbuf or -1 on an error.
380	*/
381	signed int do_iconv( char from_cs, char to_cs, char src, char dst, size_t size, size_t maxbuf )
382	{
383	GIConv cd;
384	size_t res;
385	size_t inbytesleft, outbytesleft;
386	char *inbuf = src;
387	char *outbuf = dst;
388
389	cd = g_iconv_open( to_cs, from_cs );
390	if( cd == (GIConv) -1 )
391	return( -1 );
392
393	inbytesleft = size ? size : strlen( src );
394	outbytesleft = maxbuf - 1;
395	res = g_iconv( cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft );
396	*outbuf = '\0';
397	g_iconv_close( cd );
398
399	if( res == (size_t) -1 )
400	return( -1 );
401	else
402	return( outbuf - dst );
403	}
404
405	/* A pretty reliable random number generator. Tries to use the /dev/random
406	devices first, and falls back to the random number generator from libc
407	when it fails. Opens randomizer devices with O_NONBLOCK to make sure a
408	lack of entropy won't halt BitlBee. */
409	void random_bytes( unsigned char *buf, int count )
410	{
411	#ifndef _WIN32
412	static int use_dev = -1;
413
414	/* Actually this probing code isn't really necessary, is it? */
415	if( use_dev == -1 )
416	{
417	if( access( "/dev/random", R_OK ) == 0 \|\| access( "/dev/urandom", R_OK ) == 0 )
418	use_dev = 1;
419	else
420	{
421	use_dev = 0;
422	srand( ( getpid() << 16 ) ^ time( NULL ) );
423	}
424	}
425
426	if( use_dev )
427	{
428	int fd;
429
430	/* At least on Linux, /dev/random can block if there's not
431	enough entropy. We really don't want that, so if it can't
432	give anything, use /dev/urandom instead. */
433	if( ( fd = open( "/dev/random", O_RDONLY \| O_NONBLOCK ) ) >= 0 )
434	if( read( fd, buf, count ) == count )
435	{
436	close( fd );
437	return;
438	}
439	close( fd );
440
441	/* urandom isn't supposed to block at all, but just to be
442	sure. If it blocks, we'll disable use_dev and use the libc
443	randomizer instead. */
444	if( ( fd = open( "/dev/urandom", O_RDONLY \| O_NONBLOCK ) ) >= 0 )
445	if( read( fd, buf, count ) == count )
446	{
447	close( fd );
448	return;
449	}
450	close( fd );
451
452	/* If /dev/random blocks once, we'll still try to use it
453	again next time. If /dev/urandom also fails for some
454	reason, stick with libc during this session. */
455
456	use_dev = 0;
457	srand( ( getpid() << 16 ) ^ time( NULL ) );
458	}
459
460	if( !use_dev )
461	#endif
462	{
463	int i;
464
465	/* Possibly the LSB of rand() isn't very random on some
466	platforms. Seems okay on at least Linux and OSX though. */
467	for( i = 0; i < count; i ++ )
468	buf[i] = rand() & 0xff;
469	}
470	}
471
472	int is_bool( char *value )
473	{
474	if( *value == 0 )
475	return 0;
476
477	if( ( g_strcasecmp( value, "true" ) == 0 ) \|\| ( g_strcasecmp( value, "yes" ) == 0 ) \|\| ( g_strcasecmp( value, "on" ) == 0 ) )
478	return 1;
479	if( ( g_strcasecmp( value, "false" ) == 0 ) \|\| ( g_strcasecmp( value, "no" ) == 0 ) \|\| ( g_strcasecmp( value, "off" ) == 0 ) )
480	return 1;
481
482	while( *value )
483	if( !isdigit( *value ) )
484	return 0;
485	else
486	value ++;
487
488	return 1;
489	}
490
491	int bool2int( char *value )
492	{
493	int i;
494
495	if( ( g_strcasecmp( value, "true" ) == 0 ) \|\| ( g_strcasecmp( value, "yes" ) == 0 ) \|\| ( g_strcasecmp( value, "on" ) == 0 ) )
496	return 1;
497	if( ( g_strcasecmp( value, "false" ) == 0 ) \|\| ( g_strcasecmp( value, "no" ) == 0 ) \|\| ( g_strcasecmp( value, "off" ) == 0 ) )
498	return 0;
499
500	if( sscanf( value, "%d", &i ) == 1 )
501	return i;
502
503	return 0;
504	}
505
506	struct ns_srv_reply srv_lookup( char service, char protocol, char domain )
507	{
508	struct ns_srv_reply *reply = NULL;
509	#ifdef HAVE_RESOLV_A
510	char name[1024];
511	unsigned char querybuf[1024];
512	const unsigned char *buf;
513	ns_msg nsh;
514	ns_rr rr;
515	int i, len, size;
516
517	g_snprintf( name, sizeof( name ), "_%s._%s.%s", service, protocol, domain );
518
519	if( ( size = res_query( name, ns_c_in, ns_t_srv, querybuf, sizeof( querybuf ) ) ) <= 0 )
520	return NULL;
521
522	if( ns_initparse( querybuf, size, &nsh ) != 0 )
523	return NULL;
524
525	if( ns_parserr( &nsh, ns_s_an, 0, &rr ) != 0 )
526	return NULL;
527
528	size = ns_rr_rdlen( rr );
529	buf = ns_rr_rdata( rr );
530
531	len = 0;
532	for( i = 6; i < size && buf[i]; i += buf[i] + 1 )
533	len += buf[i] + 1;
534
535	if( i > size )
536	return NULL;
537
538	reply = g_malloc( sizeof( struct ns_srv_reply ) + len );
539	memcpy( reply->name, buf + 7, len );
540
541	for( i = buf[6]; i < len && buf[7+i]; i += buf[7+i] + 1 )
542	reply->name[i] = '.';
543
544	if( i > len )
545	{
546	g_free( reply );
547	return NULL;
548	}
549
550	reply->prio = ( buf[0] << 8 ) \| buf[1];
551	reply->weight = ( buf[2] << 8 ) \| buf[3];
552	reply->port = ( buf[4] << 8 ) \| buf[5];
553	#endif
554
555	return reply;
556	}
557
558	/* Word wrapping. Yes, I know this isn't UTF-8 clean. I'm willing to take the risk. */
559	char word_wrap( const char msg, int line_len )
560	{
561	GString *ret = g_string_sized_new( strlen( msg ) + 16 );
562
563	while( strlen( msg ) > line_len )
564	{
565	int i;
566
567	/* First try to find out if there's a newline already. Don't
568	want to add more splits than necessary. */
569	for( i = line_len; i > 0 && msg[i] != '\n'; i -- );
570	if( msg[i] == '\n' )
571	{
572	g_string_append_len( ret, msg, i + 1 );
573	msg += i + 1;
574	continue;
575	}
576
577	for( i = line_len; i > 0; i -- )
578	{
579	if( msg[i] == '-' )
580	{
581	g_string_append_len( ret, msg, i + 1 );
582	g_string_append_c( ret, '\n' );
583	msg += i + 1;
584	break;
585	}
586	else if( msg[i] == ' ' )
587	{
588	g_string_append_len( ret, msg, i );
589	g_string_append_c( ret, '\n' );
590	msg += i + 1;
591	break;
592	}
593	}
594	if( i == 0 )
595	{
596	g_string_append_len( ret, msg, line_len );
597	g_string_append_c( ret, '\n' );
598	msg += line_len;
599	}
600	}
601	g_string_append( ret, msg );
602
603	return g_string_free( ret, FALSE );
604	}
605
606	gboolean ssl_sockerr_again( void *ssl )
607	{
608	if( ssl )
609	return ssl_errno == SSL_AGAIN;
610	else
611	return sockerr_again();
612	}
613
614	/* Returns values: -1 == Failure (base64-decoded to something unexpected)
615	0 == Okay
616	1 == Password doesn't match the hash. */
617	int md5_verify_password( char password, char hash )
618	{
619	md5_byte_t *pass_dec = NULL;
620	md5_byte_t pass_md5[16];
621	md5_state_t md5_state;
622	int ret = -1, i;
623
624	if( base64_decode( hash, &pass_dec ) == 21 )
625	{
626	md5_init( &md5_state );
627	md5_append( &md5_state, (md5_byte_t*) password, strlen( password ) );
628	md5_append( &md5_state, (md5_byte_t) pass_dec + 16, 5 ); / Hmmm, salt! */
629	md5_finish( &md5_state, pass_md5 );
630
631	for( i = 0; i < 16; i ++ )
632	{
633	if( pass_dec[i] != pass_md5[i] )
634	{
635	ret = 1;
636	break;
637	}
638	}
639
640	/* If we reached the end of the loop, it was a match! */
641	if( i == 16 )
642	ret = 0;
643	}
644
645	g_free( pass_dec );
646
647	return ret;
648	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: lib/misc.c @ f9110b4

Download in other formats: