@@ -35,6 +35,10 @@ Usage: %s [options] <selector> <mode> [mode argument]
3535 may be one of { data, text, attr }:
3636 data - return raw html of matching elements
3737 text - return inner text of matching elements
38+ [mode argument: formatting]
39+ supported modes: { plain, ansi, md }
40+ default: plain
41+ for plain, ANSI, or markdown formatted output respectively
3842 attr - return attribute value of matching elements
3943 <mode argument: attr>
4044 attribute to return
@@ -44,6 +48,14 @@ Usage: %s [options] <selector> <mode> [mode argument]
4448 curl -sSL https://example.com | %s a attr href
4549)" ;
4650
51+ static const string afmt_s = " \033 [" ;
52+ static const string afmt_e = " m" ;
53+ static const vector<char > collapsible = {' ' , ' \t ' , ' \n ' , ' \r ' };
54+ static const vector<unsigned long > breaking = {
55+ MyHTML_TAG_BR,
56+ MyHTML_TAG_P
57+ };
58+
4759static map<const string, bool > flags = {
4860 {" dirtyargs" , false }
4961};
@@ -86,10 +98,18 @@ bool readfile(string filename, string &target){
8698 return true ;
8799}
88100
89- template <typename T> inline bool vec_has (vector<T> &vec, T val){
101+ template <typename T> inline bool vec_has (const vector<T> &vec, T val){
90102 return std::find (vec.begin (), vec.end (), val) != vec.end ();
91103}
92104
105+ template <typename T> inline bool node_in (myhtml_tree_node_t * node, T tag){
106+ while (node){
107+ if (node->tag_id == tag) return true ;
108+ node = node->parent ;
109+ }
110+ return false ;
111+ }
112+
93113static map<const char , const string> option_longopts = { // maps shortopts to longopts from option_handlers
94114 {' h' , " help" },
95115 {' f' , " file" },
@@ -115,6 +135,78 @@ static map<const string, const function<void(int&, const char**&)>> option_handl
115135 }}
116136};
117137
138+ static pair<const function<void (myhtml_tree_node_t *, string&)>, const function<void (myhtml_tree_node_t *, string&)>> format_handlers = { // {format, unformat}
139+ [](myhtml_tree_node_t * node_iter, string &rendered){
140+ if (state[" modearg" ].length () > 0 ){
141+ const bool ansi = state[" modearg" ] == " ansi" ;
142+ const bool md = state[" modearg" ] == " md" ;
143+ switch (node_iter->tag_id ){ // modearg formatters
144+ case MyHTML_TAG_B: // bold on
145+ case MyHTML_TAG_STRONG:
146+ if (ansi) rendered += afmt_s + " 1" + afmt_e;
147+ if (md) rendered += " **" ;
148+ break ;
149+ case MyHTML_TAG_I: // italics on
150+ case MyHTML_TAG_U:
151+ case MyHTML_TAG_EM:
152+ if (ansi) rendered += afmt_s + " 4" + afmt_e;
153+ if (md) rendered += " _" ;
154+ break ;
155+ case MyHTML_TAG_CODE: // code on
156+ if (node_in (node_iter, MyHTML_TAG_PRE)){
157+ rendered += " ```\n " ;
158+ }else {
159+ if (ansi) rendered += afmt_s + " 7" + afmt_e;
160+ if (md) rendered += " `" ;
161+ }
162+ break ;
163+ }
164+ }
165+ switch (node_iter->tag_id ){ // global formatters
166+ case MyHTML_TAG_LI:
167+ rendered += " - " ;
168+ break ;
169+ }
170+ },
171+ [](myhtml_tree_node_t * node_iter, string &rendered){
172+ if (state[" modearg" ].length () > 0 ){
173+ const bool ansi = state[" modearg" ] == " ansi" ;
174+ const bool md = state[" modearg" ] == " md" ;
175+ switch (node_iter->tag_id ){ // modearg unformatters
176+ case MyHTML_TAG_B: // bold off
177+ case MyHTML_TAG_STRONG:
178+ if (ansi) rendered += afmt_s + " 21" + afmt_e;
179+ if (md) rendered += " **" ;
180+ break ;
181+ case MyHTML_TAG_I: // italics off
182+ case MyHTML_TAG_U:
183+ case MyHTML_TAG_EM:
184+ if (ansi) rendered += afmt_s + " 24" + afmt_e; // no italics here :(
185+ if (md) rendered += " _" ;
186+ break ;
187+ case MyHTML_TAG_CODE: // code off
188+ if (node_in (node_iter, MyHTML_TAG_PRE)){
189+ rendered += " ```\n " ;
190+ }else {
191+ if (ansi) rendered += afmt_s + " 27" + afmt_e;
192+ if (md) rendered += " `" ;
193+ }
194+ break ;
195+ }
196+ }
197+ switch (node_iter->tag_id ){ // global unformatters
198+ case MyHTML_TAG_LI:
199+ case MyHTML_TAG_UL:
200+ rendered += " \n " ;
201+ break ;
202+ }
203+
204+ if (vec_has (breaking, node_iter->tag_id )){ // <br/>
205+ rendered += " \n " ;
206+ }
207+ }
208+ };
209+
118210static map<const string, const function<void (myhtml_tree_node_t *)>> mode_handlers = { // maps modes to functions
119211 {" data" , [](myhtml_tree_node_t * node) {
120212 myhtml_serialization_tree_callback (node, [](const char * data, size_t len, void * ctx) -> unsigned int {
@@ -127,42 +219,41 @@ static map<const string, const function<void(myhtml_tree_node_t*)>> mode_handler
127219 {" text" , [](myhtml_tree_node_t * node) {
128220 string rendered = " " ;
129221
130- static vector<char > collapsible = {' ' , ' \t ' , ' \n ' , ' \r ' };
131- static vector<unsigned long > breaking = {
132- MyHTML_TAG_BR,
133- MyHTML_TAG_P
134- };
135-
136222 myhtml_tree_node_t * node_iter = node->child ;
137223 while (node_iter){
138224 const char * text_c = myhtml_node_text (node_iter, nullptr );
139225 string text = " " ;
140226 if (text_c != nullptr ) text += text_c;
141227
142228 if (node_iter->tag_id == MyHTML_TAG__TEXT){
143- // collapse whitespace to single character
144- string::iterator nend = unique (text.begin (), text.end (), [](char c1, char c2) -> bool {
145- return vec_has (collapsible, c1) && vec_has (collapsible, c2);
146- });
147- text.resize (static_cast <unsigned long >(nend-text.begin ()));
148-
149- // replace whitespace with space
150- replace_if (text.begin (), text.end (), [](char c) -> bool {
151- return vec_has (collapsible, c);
152- }, ' ' );
229+ if (!node_in (node_iter, MyHTML_TAG_PRE)){
230+ // collapse whitespace to single character
231+ string::iterator nend = unique (text.begin (), text.end (), [](char c1, char c2) -> bool {
232+ return vec_has (collapsible, c1) && vec_has (collapsible, c2);
233+ });
234+ text.resize (static_cast <unsigned long >(nend-text.begin ()));
235+
236+ // replace whitespace with space
237+ replace_if (text.begin (), text.end (), [](char c) -> bool {
238+ return vec_has (collapsible, c);
239+ }, ' ' );
240+ }
153241
154242 rendered += text;
243+ }else {
244+ format_handlers.first (node_iter, rendered);
155245 }
156246
157247 if (node_iter->child ) node_iter = node_iter->child ;
158248 else {
159- while (node_iter != node && node_iter->next == nullptr ) node_iter = node_iter-> parent ;
160- if (node_iter == node) break ;
249+ while (node_iter != node && node_iter->next == nullptr ){
250+ format_handlers. second (node_iter, rendered) ;
161251
162- if (vec_has (breaking, node_iter->tag_id )){ // <br/>
163- rendered += " \n " ;
252+ node_iter = node_iter->parent ;
164253 }
254+ if (node_iter == node) break ;
165255
256+ format_handlers.second (node_iter, rendered);
166257 node_iter = node_iter->next ;
167258 }
168259 }
@@ -228,7 +319,7 @@ void parseopts(int &argc, const char** &argv){
228319 cerr << " invalid short option '-" << argv[1 ][0 ] << " '" << endl;
229320 exit (EXIT_FAILURE);
230321 }
231- if (flags[" dirtyargs" ]){
322+ if (flags[" dirtyargs" ]){ // option handler touched argv (args?); skip
232323 flags[" dirtyargs" ] = false ;
233324 break ;
234325 }
0 commit comments